# setup logfile
log <- file(snakemake@log[[1]], open = "wt")
sink(log)
sink(log, type = "message")
# don't print warnings and messages in the final document
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
require(data.table)
## Warning: package 'data.table' was built under R version 4.0.3
require(DESeq2)
## Warning: package 'DESeq2' was built under R version 4.0.3
## Warning: package 'IRanges' was built under R version 4.0.3
## Warning: package 'GenomicRanges' was built under R version 4.0.3
## Warning: package 'GenomeInfoDb' was built under R version 4.0.3
## Warning: package 'SummarizedExperiment' was built under R version 4.0.3
## Warning: package 'MatrixGenerics' was built under R version 4.0.3
## Warning: package 'matrixStats' was built under R version 4.0.3
## Warning: package 'Biobase' was built under R version 4.0.3
if(!("qiime2R" %in% installed.packages()[,"Package"])){
devtools::install_github("jbisanz/qiime2R")
}
require(qiime2R)
require(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
require(vegan)
## Warning: package 'vegan' was built under R version 4.0.3
## Warning: package 'lattice' was built under R version 4.0.3
require(phyloseq)
## Warning: package 'phyloseq' was built under R version 4.0.3
require(cowplot)
## Warning: package 'cowplot' was built under R version 4.0.3
require(pheatmap)
require(pals)
The idea of this analysis is to use the DESeq2 framework for inference of indicator species for different (populations,) species and reproductive mode of Hydra microbiomes from Hungary.
otu <- read.csv(snakemake@input[["AbFilt_FT"]], row.names = 1) #"../data/dada2_AbFilt_FT.csv"
tax <- read.csv(snakemake@input[["AbFilt_Tax"]], row.names=1)#"../data/dada2_AbFilt_Tax.csv"
meta <- read.csv(snakemake@input[["AbFilt_meta"]], row.names=1)#"../data/dada2_AbFilt_meta.csv"
tree <- read_qza(snakemake@input[["AbFilt_fasta_tree"]])$data #"../data/dada2_AbFilt_fasta_tree.qza"
# add a count to facilitate log transformations
otu2 <- otu+1
# remap the reproductive data to more simple index
map_vect <- c("SEX","NR","SEX","ASEX","SEX","SEX","SEX","SEX")
names(map_vect) <- unique(meta$ReproductiveMode)
meta[["ReproductiveModeSimple"]] <- map_vect[meta[["ReproductiveMode"]]]
phy <- phyloseq(otu_table(otu, taxa_are_rows = TRUE),
tax_table(as.matrix(tax)),
sample_data(meta),
phy_tree(tree))
dds <- DESeqDataSetFromMatrix(otu2[,rownames(meta)], colData = meta, design = ~ReproductiveModeSimple + Species + PopID)
deseq <- DESeq(dds)
#deseq1 <- DESeq(dds, betaPrior = TRUE)
# get the differentials for Reproductive mode and Species
resultsList <- list()
combSpecies <- combn(unique(colData(deseq)[["Species"]]),2)
combSex <- combn(unique(colData(deseq)[["ReproductiveModeSimple"]]),2)
combSS <- list(Species = combSpecies,ReproductiveModeSimple = combSex)
for(comb in names(combSS)){
combS <- combSS[[comb]]
rslt <- list()
for(i in 1:ncol(combS)){
rslt[[paste(combS[,i], collapse = "_vs_")]]<- results(deseq,
contrast = c(comb,
as.character(combS[1,i]),as.character(combS[2,i])),
alpha=0.05)
}
resultsList[[comb]] <- rslt
}
for(variable in names(resultsList)){
for(comparison in names(resultsList[[variable]])){
print(paste(variable, comparison))
print(summary(resultsList[[variable]][[comparison]]))
}
}
## [1] "Species CIR_vs_OLI"
##
## out of 1854 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 16, 0.86%
## LFC < 0 (down) : 56, 3%
## outliers [1] : 76, 4.1%
## low counts [2] : 719, 39%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 1854 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 14, 0.76%
## LFC < 0 (down) : 37, 2%
## outliers [1] : 76, 4.1%
## low counts [2] : 719, 39%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 1854 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 43, 2.3%
## LFC < 0 (down) : 35, 1.9%
## outliers [1] : 76, 4.1%
## low counts [2] : 468, 25%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 1854 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 24, 1.3%
## LFC < 0 (down) : 38, 2%
## outliers [1] : 76, 4.1%
## low counts [2] : 539, 29%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 1854 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 62, 3.3%
## LFC < 0 (down) : 12, 0.65%
## outliers [1] : 76, 4.1%
## low counts [2] : 396, 21%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 1854 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 72, 3.9%
## LFC < 0 (down) : 4, 0.22%
## outliers [1] : 76, 4.1%
## low counts [2] : 216, 12%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
#resultsPopID <- list()
#compPopID <- grep("PopID", resultsNames(deseq1), value = TRUE)
#for(i in 1:length(compPopID)){
# resultsPopID[[compPopID[i]]] <- results(deseq1, contrast = list(compPopID[i], compPopID[-i]))
#}
difESV <- list()
for(n in names(resultsList)){
for(comp in resultsList[[n]]){
compSel <- comp$padj < 0.05 & !is.na(comp$padj)
difESV[[n]] <- unique(c(difESV[[n]], rownames(comp[compSel,])))
}
}
In total there are 133 and 164 differentially abundant ESV between Species and Reproductive mode, respectively. Lets visualize that and plot the fold changes for each of them in the different comparisons.
SpeciesSexPlot <- list()
resVals <- list()
for(cdt in names(resultsList)){
cdtPlots <- list()
res <- list()
for(comp in names(resultsList[[cdt]])){
compR <- as.data.frame(resultsList[[cdt]][[comp]])
fl <- merge(compR, tax[,-8], by = 0)
fl <- fl[fl[["padj"]] < 0.05 & !is.na(fl[["padj"]]),]
#fl <- fl[order(abs(fl[["log2FoldChange"]]))[1:20],]
fl[["ESV"]] <- sapply(fl[["Row.names"]], function(x) {paste(collapse="", strsplit(as.character(x), split ="")[[1]][1:6])})
xlb <- paste(collapse = " ", strsplit(comp, split = "_")[[1]][c(3,2,1)])
pp <- ggplot(fl, aes(x=log2FoldChange, y = ESV, fill = Phylum)) +
geom_bar(stat ="identity") +
geom_errorbar(aes(xmin = log2FoldChange - lfcSE, xmax = log2FoldChange + lfcSE))+
xlab(xlb)
cdtPlots[[comp]] <- pp
res[[comp]] <- fl
}
SpeciesSexPlot[[cdt]] <- cowplot::plot_grid(plotlist = cdtPlots, nrow=1)
resVals[[cdt]] <- res
}
SpeciesSexPlot[[1]]
SpeciesSexPlot[[2]]
Lets check the result also in a heatmap. The OTU matrix is very sparse, thus it might be that the effects are not really visible at this level of resolution.
#resNames <- results(deseq)
#vennList <- list()
#for(n in names(resVals)){
# res <- resVals[[n]]
# lvls <- unique(unlist(strsplit(names(res), split = "_vs_")))
# vennMat <- matrix(0,
# ncol = length(lvls),
# nrow = nrow(resNames),
# dim = list(x= rownames(resNames),
# y = lvls)
# )
#
# for(m in lvls){
vst <- varianceStabilizingTransformation(deseq, fitType='local')
variable <- names(difESV)[1]
# define custom colors
colSpecies <- polychrome(length(unique(meta$Species)))
colPopID <- polychrome(length(unique(meta$PopID)))
colReproductiveModeSimple <- polychrome(length(unique(meta$ReproductiveModeSimple)))
colPhylum <- polychrome(length(unique(tax[difESV[[variable]],"Phylum"])))
colClass <- polychrome(length(unique(tax[difESV[[variable]],"Class"])))
names(colSpecies) <- unique(meta$Species)
names(colPopID) <- unique(meta$PopID)
names(colReproductiveModeSimple) <- unique(meta$ReproductiveModeSimple)
names(colPhylum) <- unique(tax[difESV[[variable]],"Phylum"])
names(colClass) <- unique(tax[difESV[[variable]],"Class"])
anno_colors <- list(Species = colSpecies,
PopID = colPopID,
ReproductiveModeSimple = colReproductiveModeSimple,
Phylum = colPhylum,
Class = colClass)
Species_pp <- pheatmap(assay(vst)[difESV[[variable]],],
main=variable,
scale="row",
annotation_col= meta[,c("Species","PopID","ReproductiveModeSimple")],
annotation_row= tax[difESV[[variable]],c("Phylum","Class")],
show_colnames=FALSE,
show_rownames=FALSE,
annotation_colors=anno_colors)
Species_pp
variable <- names(difESV)[2]
# define custom colors
colSpecies <- polychrome(length(unique(meta$Species)))
colPopID <- polychrome(length(unique(meta$PopID)))
colReproductiveModeSimple <- polychrome(length(unique(meta$ReproductiveModeSimple)))
colPhylum <- polychrome(length(unique(tax[difESV[[variable]],"Phylum"])))
colClass <- polychrome(length(unique(tax[difESV[[variable]],"Class"])))
names(colSpecies) <- unique(meta$Species)
names(colPopID) <- unique(meta$PopID)
names(colReproductiveModeSimple) <- unique(meta$ReproductiveModeSimple)
names(colPhylum) <- unique(tax[difESV[[variable]],"Phylum"])
names(colClass) <- unique(tax[difESV[[variable]],"Class"])
anno_colors <- list(Species = colSpecies,
PopID = colPopID,
ReproductiveModeSimple = colReproductiveModeSimple,
Phylum = colPhylum,
Class = colClass)
Sex_pp <- pheatmap(assay(vst)[difESV[[variable]],],
main=variable,
scale="row",
annotation_col= meta[,c("Species","PopID","ReproductiveModeSimple")],
annotation_row= tax[difESV[[variable]],c("Phylum","Class")],
show_colnames=FALSE,
show_rownames=FALSE,
annotation_colors=anno_colors)
Sex_pp
OK as suspected, this is very sparse on data, thus it might be advisable to cluster the data at some level of phylogeny. So lets do this.
# rename strange names to unknown, where basically unknown is suitable
unknown <- c("uncultured", "metagenome","Unassigned")
tax2 <- tax
for(clm in colnames(tax2)){
if(clm != "rep.seq"){
x <- tax2[,clm]
tax2[is.na(x),clm] <- "unknown"
for(pattern in unknown){
sel <- grep(pattern, x)
tax2[sel,clm] <- "unknown"
}
}
}
# merge into phylogeny into new vector for easier access
for(i in 2:7){
nm <- paste0(colnames(tax2)[i],"_phylo")
tax2[[nm]] <- apply(tax2[,1:i],1,paste, collapse="_")
}
# now lets sum the reads per unique phylo description
clustered_otu <- list()
for(clm in grep("_phylo",names(tax2), value =TRUE)){
otu_mat <- matrix(0, ncol = ncol(otu), nrow= length(unique(tax2[,clm])),
dimnames = list(unique(tax2[,clm]), colnames(otu)))
for(taxa in unique(tax2[,clm])){
ids <- rownames(tax2)[tax2[,clm] == taxa]
otu_mat[taxa,] <- apply(otu[ids,],2,sum)
}
clustered_otu[[clm]] <- otu_mat
}
# run deseq on the clustered data
deseqPhylo <- list()
for(phylo in names(clustered_otu)){
dds <- DESeqDataSetFromMatrix(clustered_otu[[phylo]][,rownames(meta)]+1, colData = meta, design = ~ReproductiveModeSimple + Species + PopID)
deseq <- DESeq(dds)
deseqPhylo[[phylo]] <- deseq
}
# get the differentials for Reproductive mode and Species
resultsPhylo <- list()
for(phylo in names(deseqPhylo)){
deseq <- deseqPhylo[[phylo]]
print(phylo)
resultsList <- list()
combSpecies <- combn(unique(colData(deseq)[["Species"]]),2)
combSex <- combn(unique(colData(deseq)[["ReproductiveModeSimple"]]),2)
combSS <- list(Species = combSpecies,ReproductiveModeSimple = combSex)
for(comb in names(combSS)){
combS <- combSS[[comb]]
rslt <- list()
for(i in 1:ncol(combS)){
rslt[[paste(combS[,i], collapse = "_vs_")]]<- results(deseq,
contrast = c(comb,
as.character(combS[1,i]),as.character(combS[2,i])),
alpha=0.05)
}
resultsList[[comb]] <- rslt
}
#
for(variable in names(resultsList)){
for(comparison in names(resultsList[[variable]])){
print(paste(variable, comparison))
print(summary(resultsList[[variable]][[comparison]]))
}
}
resultsPhylo[[phylo]] <- resultsList
}
## [1] "Phylum_phylo"
## [1] "Species CIR_vs_OLI"
##
## out of 21 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1, 4.8%
## LFC < 0 (down) : 5, 24%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 21 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 1, 4.8%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 21 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1, 4.8%
## LFC < 0 (down) : 1, 4.8%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 21 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1, 4.8%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 21 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3, 14%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 21 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Class_phylo"
## [1] "Species CIR_vs_OLI"
##
## out of 46 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1, 2.2%
## LFC < 0 (down) : 8, 17%
## outliers [1] : 1, 2.2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 46 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 2, 4.3%
## LFC < 0 (down) : 4, 8.7%
## outliers [1] : 1, 2.2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 46 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 1, 2.2%
## outliers [1] : 1, 2.2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 46 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1, 2.2%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 1, 2.2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 46 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 6, 13%
## LFC < 0 (down) : 2, 4.3%
## outliers [1] : 1, 2.2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 46 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 1, 2.2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Order_phylo"
## [1] "Species CIR_vs_OLI"
##
## out of 114 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3, 2.6%
## LFC < 0 (down) : 21, 18%
## outliers [1] : 2, 1.8%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 114 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 2, 1.8%
## LFC < 0 (down) : 16, 14%
## outliers [1] : 2, 1.8%
## low counts [2] : 18, 16%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 114 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 12, 11%
## LFC < 0 (down) : 3, 2.6%
## outliers [1] : 2, 1.8%
## low counts [2] : 47, 41%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 114 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 9, 7.9%
## LFC < 0 (down) : 2, 1.8%
## outliers [1] : 2, 1.8%
## low counts [2] : 25, 22%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 114 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 10, 8.8%
## LFC < 0 (down) : 2, 1.8%
## outliers [1] : 2, 1.8%
## low counts [2] : 27, 24%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 114 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 2, 1.8%
## LFC < 0 (down) : 1, 0.88%
## outliers [1] : 2, 1.8%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Family_phylo"
## [1] "Species CIR_vs_OLI"
##
## out of 201 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 4, 2%
## LFC < 0 (down) : 35, 17%
## outliers [1] : 4, 2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 201 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3, 1.5%
## LFC < 0 (down) : 20, 10%
## outliers [1] : 4, 2%
## low counts [2] : 77, 38%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 201 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 19, 9.5%
## LFC < 0 (down) : 5, 2.5%
## outliers [1] : 4, 2%
## low counts [2] : 4, 2%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 201 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 8, 4%
## LFC < 0 (down) : 3, 1.5%
## outliers [1] : 4, 2%
## low counts [2] : 73, 36%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 201 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 19, 9.5%
## LFC < 0 (down) : 2, 1%
## outliers [1] : 4, 2%
## low counts [2] : 73, 36%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 201 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3, 1.5%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 4, 2%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Genus_phylo"
## [1] "Species CIR_vs_OLI"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 7, 2.2%
## LFC < 0 (down) : 47, 15%
## outliers [1] : 11, 3.4%
## low counts [2] : 106, 33%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 6, 1.9%
## LFC < 0 (down) : 28, 8.7%
## outliers [1] : 11, 3.4%
## low counts [2] : 159, 49%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 30, 9.3%
## LFC < 0 (down) : 8, 2.5%
## outliers [1] : 11, 3.4%
## low counts [2] : 19, 5.9%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 13, 4%
## LFC < 0 (down) : 7, 2.2%
## outliers [1] : 11, 3.4%
## low counts [2] : 142, 44%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 19, 5.9%
## LFC < 0 (down) : 1, 0.31%
## outliers [1] : 11, 3.4%
## low counts [2] : 88, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 7, 2.2%
## LFC < 0 (down) : 1, 0.31%
## outliers [1] : 11, 3.4%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species_phylo"
## [1] "Species CIR_vs_OLI"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 7, 2.2%
## LFC < 0 (down) : 47, 15%
## outliers [1] : 11, 3.4%
## low counts [2] : 106, 33%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species CIR_vs_VUL"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 6, 1.9%
## LFC < 0 (down) : 28, 8.7%
## outliers [1] : 11, 3.4%
## low counts [2] : 159, 49%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "Species OLI_vs_VUL"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 30, 9.3%
## LFC < 0 (down) : 8, 2.5%
## outliers [1] : 11, 3.4%
## low counts [2] : 19, 5.9%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_NR"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 13, 4%
## LFC < 0 (down) : 7, 2.2%
## outliers [1] : 11, 3.4%
## low counts [2] : 142, 44%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple SEX_vs_ASEX"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 19, 5.9%
## LFC < 0 (down) : 1, 0.31%
## outliers [1] : 11, 3.4%
## low counts [2] : 88, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] "ReproductiveModeSimple NR_vs_ASEX"
##
## out of 323 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 7, 2.2%
## LFC < 0 (down) : 1, 0.31%
## outliers [1] : 11, 3.4%
## low counts [2] : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
OK now we have results for all the different phylogentic levels. Lets plot these again.
clmns <- names(tax2)[1:7]
effectPhylo <- list()
for(phylo in names(resultsPhylo)){
resultsList <- resultsPhylo[[phylo]]
SpeciesSexPlot <- list()
#
for(cdt in names(resultsList)){
cdtPlots <- list()
#
for(comp in names(resultsList[[cdt]])){
compR <- as.data.frame(resultsList[[cdt]][[comp]])
fl <- compR
fl <- fl[fl[["padj"]] < 0.05 & !is.na(fl[["padj"]]),]
if(nrow(fl) >0){
fl[["Level"]] <-sapply(rownames(fl),
function(x) {
length(strsplit(x, split ="_")[[1]])
}
)
#
fnLev <- clmns[fl[1,"Level"]]
fl[[fnLev]] <- paste(1:nrow(fl),
sapply(rownames(fl),
function(x) {
y <- strsplit(x, split ="_")[[1]]
return(y[length(y)])
}
),
sep = "_")
#
preLev <- clmns[fl[1,"Level"]-1]
fl[[preLev]] <- sapply(rownames(fl),
function(x) {
y <- strsplit(x, split ="_")[[1]]
return(y[length(y)-1])})
#
fl[["Phylum"]]<- sapply(rownames(fl),
function(x) {
y <- strsplit(x, split ="_")[[1]]
return(y[2])})
#
xlb <- paste(collapse = " ", strsplit(comp, split = "_")[[1]][c(3,2,1)])
pp <- ggplot(fl, aes_string(x="log2FoldChange", y = fnLev, fill = preLev, color = "Phylum")) +
geom_bar(stat ="identity") +
geom_errorbar(aes(xmin = log2FoldChange - lfcSE, xmax = log2FoldChange + lfcSE))+
xlab(xlb) +
ggtitle(phylo) +
scale_color_manual(values = c(as.vector(polychrome()),"white")) +
scale_fill_manual(values = c(as.vector(polychrome()),"white"))
cdtPlots[[comp]] <- pp
}
}
SpeciesSexPlot[[cdt]] <- cowplot::plot_grid(plotlist = cdtPlots, nrow=1)
}
#
print(SpeciesSexPlot[[1]])
print(SpeciesSexPlot[[2]])
effectPhylo[[phylo]] <- SpeciesSexPlot
}
This is nice (a little colorful though). Lets see whether heatmaps look better on these here.
for(phylo in names(deseqPhylo)){
resultsList <- resultsPhylo[[phylo]]
deseq <- deseqPhylo[[phylo]]
difESV <- list()
for(n in names(resultsList)){
for(comp in resultsList[[n]]){
compSel <- comp$padj < 0.05 & !is.na(comp$padj)
difESV[[n]] <- unique(c(difESV[[n]], rownames(comp[compSel,])))
}
}
vst <- varianceStabilizingTransformation(deseq, fitType='local')
variable <- names(difESV)[1]
# define custom colors
colSpecies <- polychrome(length(unique(meta$Species)))
colPopID <- polychrome(length(unique(meta$PopID)))
colReproductiveModeSimple <- polychrome(length(unique(meta$ReproductiveModeSimple)))
colPhylum <- polychrome(length(unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Phylum"])))
colClass <- polychrome(length(unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Class"])))
names(colSpecies) <- unique(meta$Species)
names(colPopID) <- unique(meta$PopID)
names(colReproductiveModeSimple) <- unique(meta$ReproductiveModeSimple)
names(colPhylum) <- unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Phylum"])
names(colClass) <- unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Class"])
anno_colors <- list(Species = colSpecies,
PopID = colPopID,
ReproductiveModeSimple = colReproductiveModeSimple,
Phylum = colPhylum,
Class = colClass)
tax3 <- tax2[!duplicated(tax2[[phylo]]),]
rownames(tax3) <- tax3[[phylo]]
tax3 <- tax3[rownames(vst),]
Species_pp <- pheatmap(assay(vst)[difESV[[variable]],],
main=paste(phylo, variable),
scale="row",
annotation_col= meta[,c("Species","PopID","ReproductiveModeSimple")],
annotation_row= tax3[tax3[[phylo]] %in% difESV[[variable]],c("Phylum","Class")],
show_colnames=FALSE,
show_rownames=FALSE,
annotation_colors=anno_colors)
Species_pp
variable <- names(difESV)[2]
# define custom colors
colSpecies <- polychrome(length(unique(meta$Species)))
colPopID <- polychrome(length(unique(meta$PopID)))
colReproductiveModeSimple <- polychrome(length(unique(meta$ReproductiveModeSimple)))
colPhylum <- polychrome(length(unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Phylum"])))
colClass <- polychrome(length(unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Class"])))
names(colSpecies) <- unique(meta$Species)
names(colPopID) <- unique(meta$PopID)
names(colReproductiveModeSimple) <- unique(meta$ReproductiveModeSimple)
names(colPhylum) <- unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Phylum"])
names(colClass) <- unique(tax2[tax2[[phylo]] %in% difESV[[variable]],"Class"])
anno_colors <- list(Species = colSpecies,
PopID = colPopID,
ReproductiveModeSimple = colReproductiveModeSimple,
Phylum = colPhylum,
Class = colClass)
tax3 <- tax2[!duplicated(tax2[[phylo]]),]
rownames(tax3) <- tax3[[phylo]]
tax3 <- tax3[rownames(vst),]
Species_pp <- pheatmap(assay(vst)[difESV[[variable]],],
main=paste(phylo, variable),
scale="row",
annotation_col= meta[,c("Species","PopID","ReproductiveModeSimple")],
annotation_row= tax3[tax3[[phylo]] %in% difESV[[variable]],c("Phylum","Class")],
show_colnames=FALSE,
show_rownames=FALSE,
annotation_colors=anno_colors)
Species_pp
}